rm(list = ls())
library(tidyverse)
library(dplyr)
library(plyr)
library(readr)
library(stringr)
library(ggplot2)

BMIWHR_Biobank = read.csv("BMIWHR_Biobank.csv", header = T, stringsAsFactors = T) 
dictionary <- read.csv("variables des_20220514.csv", header =TRUE, sep = ',')

#11yo BMI + SEP   
BMI132_SEP = read.csv("BMI132_SEP.csv", header = T,stringsAsFactors = T) 
# 11yo BMI + survey1  
BMI132_survey1 = read.csv("BMI132_survey1.csv", header =T, stringsAsFactors = T) 

# 11yo WHR  
WHR11yo_survey1_SEP = read.csv("WHR11yo_survey1_SEP.csv", header = T, stringsAsFactors = T)
# 17.6 yo BMIWHR
BMIWHR_Biobank = read.csv("BMIWHR_Biobank.csv", header = T, stringsAsFactors = T) 
# 11.5 yo =====  
#BMI====
#Step 1 
# BMI+SEP
x <- nrow(BMI132_SEP)
LRM1_coef <- function(x){
  lm1 <- lm(formula = BMI132_SEP$bmi_132 ~ as.numeric(x), data = BMI132_SEP, na.rm = TRUE)
  summary(lm(formula = BMI132_SEP$bmi_132 ~ as.numeric(x), data = BMI132_SEP, na.rm = TRUE))
}
lm1 <- lapply(BMI132_SEP[,3:length(BMI132_SEP)],LRM1_coef) 
coeff <- lapply(lm1,coefficients)

BMI132SEP.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI132SEP.res <-rbind(res, BMI132SEP.res)
}
BMI132SEP.res <- BMI132SEP.res[-seq(1,nrow(BMI132SEP.res), by = 2),]

# BMI+survey1
x <- nrow(BMI132_survey1)
LRM1_coef <- function(x){
  lm1 <- lm(formula = BMI132_survey1$bmi_132 ~ as.numeric(x), data = BMI132_survey1, na.rm = TRUE)
  summary(lm(formula = BMI132_survey1$bmi_132 ~ as.numeric(x), data = BMI132_survey1, na.rm = TRUE))
}
lm1 <- lapply(BMI132_survey1[,3:length(BMI132_survey1)],LRM1_coef) 
coeff <- lapply(lm1,coefficients)

BMI132SURVEY1.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI132SURVEY1.res <-rbind(res, BMI132SURVEY1.res)
}
BMI132SURVEY1.res <- BMI132SURVEY1.res[-seq(1,nrow(BMI132SURVEY1.res), by = 2),]
# combine BMI+SEP+survey 1 results
BMI11.res <- rbind(BMI132SEP.res,BMI132SURVEY1.res)

# Bonferroni    
BMI11.res$Bonf.sig <- 0
nrow(BMI11.res)
0.05/nrow(BMI11.res)
BMI11.res$Bonf.sig[which(BMI11.res$Pr...t.. < 0.05/nrow(BMI11.res))] <- 1  
sum(BMI11.res$Bonf.sig)
BMI11.res$names.coeff.i.. [which(BMI11.res$Bonf.sig==1)]
BMI11.Bonf.step1res <- BMI11.res[which(BMI11.res$Bonf.sig == 1),]  

# Step 2
BMI11.Bonf <- BMI11.res$names.coeff.i..[which(BMI11.res$Bonf.sig == 1)]  
BMI11.Bonf <- c(BMI11.Bonf,Confounders)
BMI11.Bonf <- unique(BMI11.Bonf)

#exposures from SEP
BMI11.Bonf1 <- cbind(BMI132_SEP[,c(1,2)],BMI132_SEP[,(names(BMI132_SEP) %in% BMI11.Bonf)])

x <- nrow(BMI11.Bonf1)
LRM2_coef <- function(x){
  lm2 <- lm(formula = BMI11.Bonf1$bmi_132 ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf1, na.rm = TRUE)
  summary(lm(formula = BMI11.Bonf1$bmi_132 ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf1, na.rm = TRUE))
}
lm2 <- lapply(BMI11.Bonf1[,3:length(BMI11.Bonf1)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf1.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf1.res <-rbind(res, BMI11.Bonf1.res)
}
BMI11.Bonf1.step2res <- BMI11.Bonf1.res[grepl("as.numeric",rownames(BMI11.Bonf1.res)),]

# replace 'smoke_hyg','smoke_source' should not adjust for 'mshs'
smoking <- c('bmi_132',Confounders,'smoke_hyg','smoke_source')
BMI11.Bonf3 <- BMI11.Bonf1[,(names(BMI11.Bonf1) %in% smoking)]

x <- nrow(BMI11.Bonf3)
LRM2_coef <- function(x){
  lm2 <- lm(formula = BMI11.Bonf3$bmi_132 ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf3, na.rm = TRUE)
  summary(lm(formula = BMI11.Bonf3$bmi_132 ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf3, na.rm = TRUE))
}
lm2 <- lapply(BMI11.Bonf3[,3:length(BMI11.Bonf3)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf3.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf3.res <-rbind(res, BMI11.Bonf3.res)
}
BMI11.Bonf3.step2res <- BMI11.Bonf3.res[grepl("as.numeric",rownames(BMI11.Bonf3.res)),]
BMI11.Bonf1.step2res[c(15,16),] <-BMI11.Bonf3.step2res[c(6,7),] 



#exposures from survey1
BMI11.Bonf2 <- cbind(BMI132_survey1[,c(1,2)],BMI132_survey1[,(names(BMI132_survey1) %in% BMI11.Bonf)])
BMI11.Bonf2.c <- cbind(BMI132_SEP[,"id"] ,BMI132_SEP[,(names(BMI132_SEP) %in% Confounders)])
names(BMI11.Bonf2.c)[1] <- "id"
BMI11.Bonf2  <- merge(BMI11.Bonf2,BMI11.Bonf2.c, by = "id")

x <- nrow(BMI11.Bonf2)
LRM2_coef <- function(x){
  lm2 <- lm(formula = BMI11.Bonf2$bmi_132 ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf2, na.rm = TRUE)
  summary(lm(formula = BMI11.Bonf2$bmi_132 ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf2, na.rm = TRUE))
}
lm2 <- lapply(BMI11.Bonf2[,3:length(BMI11.Bonf2)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf2.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf2.res <-rbind(res, BMI11.Bonf2.res)
}
BMI11.Bonf2.step2res <- BMI11.Bonf2.res[grepl("as.numeric",rownames(BMI11.Bonf2.res)),]
dim(BMI11.Bonf2.step2res)
BMI11.Bonf2.step2res <- BMI11.Bonf2.step2res[c(8,9),]

BMI11.Bonf.step2res <- rbind(BMI11.Bonf1.step2res, BMI11.Bonf2.step2res)


#change-in-estimate 
res.compare <- merge(BMI11.Bonf.step1res,BMI11.Bonf.step2res, by = "names.coeff.i..")
res.compare$change <- abs(((res.compare$Estimate.x-res.compare$Estimate.y)/res.compare$Estimate.x))*100
res.compare$change <- round(res.compare$change,2)
BMI11.Bonf.step2 <- res.compare[c(which(res.compare$change<50)),]
names(BMI11.Bonf.step2) <- c("varnames","Estimate.step1","p.step1","Bonf.sig","Estimate.step2","p.step2","change")
write.csv(BMI11.Bonf.step2, "BMI11_Bonf_step2res_new.csv") 

# WHR=====
#Step 1
x <- nrow(WHR11yo_survey1_SEP)
LRM1_coef <- function(x){
  lm1 <- lm(formula = WHR11yo_survey1_SEP$WHR11yo ~ as.numeric(x), data = WHR11yo_survey1_SEP, na.rm = TRUE)
  summary(lm(formula = WHR11yo_survey1_SEP$WHR11yo ~ as.numeric(x), data = WHR11yo_survey1_SEP, na.rm = TRUE))
}
lm1 <- lapply(WHR11yo_survey1_SEP[,10:length(WHR11yo_survey1_SEP)],LRM1_coef) 
coeff <- lapply(lm1,coefficients)

WHR11.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR11.res <-rbind(res, WHR11.res)
}
WHR11.res <- WHR11.res[-seq(1,nrow(WHR11.res), by = 2),]


# Bonferroni  
WHR11.res$Bonf.sig <- 0
nrow(WHR11.res)
0.05/nrow(WHR11.res)
WHR11.res$Bonf.sig[which(WHR11.res$Pr...t.. < 0.05/nrow(WHR11.res))] <- 1  
sum(WHR11.res$Bonf.sig)
WHR11.res$names.coeff.i.. [which(WHR11.res$Bonf.sig==1)]
WHR11.Bonf.step1res <- WHR11.res[which(WHR11.res$Bonf.sig == 1),]  
# Step 2
WHR11.Bonf <- WHR11.res$names.coeff.i..[which(WHR11.res$Bonf.sig == 1)]  
WHR11.Bonf <- c(WHR11.Bonf,Confounders)
WHR11.Bonf <- unique(WHR11.Bonf)
WHR11.Bonf <- cbind(WHR11yo_survey1_SEP[,c(2,3)],WHR11yo_survey1_SEP[,(names(WHR11yo_survey1_SEP) %in% WHR11.Bonf)])

x <- nrow(WHR11.Bonf)
LRM2_coef <- function(x){
  lm2 <- lm(formula = WHR11.Bonf$WHR11yo ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR11.Bonf, na.rm = TRUE)
  summary(lm(formula = WHR11.Bonf$WHR11yo ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR11.Bonf, na.rm = TRUE))
}
lm2 <- lapply(WHR11.Bonf[,3:length(WHR11.Bonf)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

WHR11.Bonf.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR11.Bonf.res <-rbind(res, WHR11.Bonf.res)
}
WHR11.Bonf.step2res <- WHR11.Bonf.res[grepl("as.numeric",rownames(WHR11.Bonf.res)),]

#change-in-estimate 
res.compare <- merge(WHR11.Bonf.step1res,WHR11.Bonf.step2res, by = "names.coeff.i..")
res.compare$change <- abs(((res.compare$Estimate.x-res.compare$Estimate.y)/res.compare$Estimate.x))*100
res.compare$change <- round(res.compare$change,2)
WHR11.Bonf.step2 <- res.compare[c(which(res.compare$change<50)),]
names(WHR11.Bonf.step2) <- c("varnames","Estimate.step1","p.step1","Bonf.sig","Estimate.step2","p.step2","change")
write.csv(WHR11.Bonf.step2, "WHR11_Bonf_step2res.csv")
#17.6yo======
#BMI==========
#Step 1
x <- nrow(BMIWHR_Biobank)
LRM1_coef <- function(x){
  lm1 <- lm(formula = BMIWHR_Biobank$bmi ~ as.numeric(x), data = BMIWHR_Biobank, na.rm = TRUE)
  summary(lm(formula = BMIWHR_Biobank$bmi ~ as.numeric(x), data = BMIWHR_Biobank, na.rm = TRUE))
}
lm1 <- lapply(BMIWHR_Biobank[,9:length(BMIWHR_Biobank)],LRM1_coef) 
coeff <- lapply(lm1,coefficients)

BMI17.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI17.res <-rbind(res, BMI17.res)
}
BMI17.res <- BMI17.res[-seq(1,nrow(BMI17.res), by = 2),]


# Bonferroni    
BMI17.res$Bonf.sig <- 0
0.05/nrow(BMI17.res)
BMI17.res$Bonf.sig[which(BMI17.res$Pr...t.. < 0.05/nrow(BMI17.res))] <- 1  
sum(BMI17.res$Bonf.sig)    
BMI17.res$names.coeff.i.. [which(BMI17.res$Bonf.sig==1)]
BMI17.Bonf.step1res <- BMI17.res[which(BMI17.res$Bonf.sig == 1),]  
#Step 2
#Bonferroni 
BMI17.Bonf <- BMI17.res$names.coeff.i..[which(BMI17.res$Bonf.sig == 1)]  
BMI17.Bonf <- c(BMI17.Bonf,Confounders)
BMI17.Bonf <- unique(BMI17.Bonf)
BMI17.Bonf <- cbind(BMIWHR_Biobank[,c(3,4)],BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% BMI17.Bonf)])

x <- nrow(BMI17.Bonf)
LRM2_coef <- function(x){
  lm2 <- lm(formula = BMI17.Bonf$bmi ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI17.Bonf, na.rm = TRUE)
  summary(lm(formula = BMI17.Bonf$bmi ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI17.Bonf, na.rm = TRUE))
}
lm2 <- lapply(BMI17.Bonf[,3:length(BMI17.Bonf)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI17.Bonf.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI17.Bonf.res <-rbind(res, BMI17.Bonf.res)
}
BMI17.Bonf.step2res <- BMI17.Bonf.res[grepl("as.numeric",rownames(BMI17.Bonf.res)),]

# exposures: mshs,smoke_hyg, should not adjust for mshs 
summary(lm(formula = BMI17.Bonf$bmi ~  sex + house + as.numeric(mshs) + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI17.Bonf, na.rm = TRUE))
summary(lm(formula = BMI17.Bonf$bmi ~ as.numeric(smoke_hyg) + sex + house  + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI17.Bonf, na.rm = TRUE))


#change-in-estimate 
res.compare <- merge(BMI17.Bonf.step1res,BMI17.Bonf.step2res, by = "names.coeff.i..")
res.compare$change <- abs(((res.compare$Estimate.x-res.compare$Estimate.y)/res.compare$Estimate.x))*100
res.compare$change <- round(res.compare$change,2)
BMI17.Bonf.step2 <- res.compare[c(which(res.compare$change<50)),]
names(BMI17.Bonf.step2) <- c("varnames","Estimate.step1","p.step1","Bonf.sig","Estimate.step2","p.step2","change")
write.csv(BMI17.Bonf.step2, "BMI17_Bonf_step2res.csv")
#WHR=====
#Step 1
x <- nrow(BMIWHR_Biobank)
LRM1_coef <- function(x){
  lm1 <- lm(formula = BMIWHR_Biobank$whr ~ as.numeric(x), data = BMIWHR_Biobank, na.rm = TRUE)
  summary(lm(formula = BMIWHR_Biobank$whr ~ as.numeric(x), data = BMIWHR_Biobank, na.rm = TRUE))
}
lm1 <- lapply(BMIWHR_Biobank[,9:length(BMIWHR_Biobank)],LRM1_coef) 
coeff <- lapply(lm1,coefficients)

WHR17.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR17.res <-rbind(res, WHR17.res)
}
WHR17.res <- WHR17.res[-seq(1,nrow(WHR17.res), by = 2),]

# Bonferroni  
WHR17.res$Bonf.sig <- 0
0.05/nrow(WHR17.res)
WHR17.res$Bonf.sig[which(WHR17.res$Pr...t.. < 0.05/nrow(WHR17.res))] <- 1  
sum(WHR17.res$Bonf.sig)
WHR17.res$names.coeff.i.. [which(WHR17.res$Bonf.sig==1)]
WHR17.Bonf.step1res <- WHR17.res[which(WHR17.res$Bonf.sig == 1),] 
#Step 2
WHR17.Bonf <- WHR17.res$names.coeff.i..[which(WHR17.res$Bonf.sig == 1)]  
WHR17.Bonf <- c(WHR17.Bonf,Confounders)
WHR17.Bonf <- unique(WHR17.Bonf)
WHR17.Bonf <- cbind(BMIWHR_Biobank[,c(1,3)],BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% WHR17.Bonf)])

x <- nrow(WHR17.Bonf)
LRM2_coef <- function(x){
  lm2 <- lm(formula = WHR17.Bonf$whr ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR17.Bonf, na.rm = TRUE)
  summary(lm(formula = WHR17.Bonf$whr ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR17.Bonf, na.rm = TRUE))
}
lm2 <- lapply(WHR17.Bonf[,3:length(WHR17.Bonf)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

WHR17.Bonf.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR17.Bonf.res <-rbind(res, WHR17.Bonf.res)
}
WHR17.Bonf.step2res <- WHR17.Bonf.res[grepl("as.numeric",rownames(WHR17.Bonf.res)),]

#change-in-estimate 
res.compare <- merge(WHR17.Bonf.step1res,WHR17.Bonf.step2res, by = "names.coeff.i..")
res.compare$change <- abs(((res.compare$Estimate.x-res.compare$Estimate.y)/res.compare$Estimate.x))*100
res.compare$change <- round(res.compare$change,2)
WHR17.Bonf.step2 <- res.compare[c(which(res.compare$change<50)),]
names(WHR17.Bonf.step2) <- c("varnames","Estimate.step1","p.step1","Bonf.sig","Estimate.step2","p.step2","change")
write.csv(WHR17.Bonf.step2, "WHR17_Bonf_step2res.csv")
# Step 3: follow-up analysis ================

#associations with measures at 23 yo 
followup <- read.csv(file = "Follow-up data.csv", header =TRUE, sep = ',')
names(followup)[2] <- "id"
followup$WHR <- followup$Waist..cm./followup$Hip..cm.
BMI_new <- followup[,c("id","BMI")] 
WHR_new <- followup[,c("id","WHR")] 

# BMI=====
# at 11.5 yo
BMI.Bonf.fu <- c(Confounders, BMI11.Bonf.step2$varnames,"id")
BMI.Bonf.fu <- unique(BMI.Bonf.fu)
BMI.Bonf.fu1  <-  BMIWHR_Biobank[,(names(BMI132_SEP) %in% BMI.Bonf.fu)]
BMI_Bonf_verify1 <- merge(BMI_new, BMI.Bonf.fu1,by = "id") 
BMI_Bonf_verify1$sex <- as.numeric(BMI_Bonf_verify1$sex)
BMI.Bonf.fu2  <- BMI132_survey1[,(names(BMI132_survey1) %in% BMI.Bonf.fu)]
BMI_Bonf_verify2 <- merge(BMI_new, BMI.Bonf.fu2,by = "id") 
BMI_Bonf_verify <- merge(BMI_Bonf_verify1,BMI_Bonf_verify2, all.x = T)
str(BMI_Bonf_verify)

LRM3_coef <- function(x){
  lm3 <- lm(formula = BMI_Bonf_verify$BMI ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI_Bonf_verify, na.rm = TRUE)
  summary(lm(formula = BMI_Bonf_verify$BMI ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
}
lm3 <- lapply(BMI_Bonf_verify[,3:length(BMI_Bonf_verify)],LRM3_coef) 
coeff <- lapply(lm3,coefficients)


BMI_Bonf_verify.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI_Bonf_verify.res <-rbind(res, BMI_Bonf_verify.res)
}
BMI_Bonf_verify.res <- BMI_Bonf_verify.res[grepl("as.numeric",rownames(BMI_Bonf_verify.res)),]
names(BMI_Bonf_verify.res) <- c("varnames","Estimate.fu","p.fu")

# group of smoking exposures
smk1 = summary(lm(formula = BMI_Bonf_verify$BMI ~ as.numeric(mshs) + sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
coef(smk1)[2,c(1,4)]
smk2 = summary(lm(formula = BMI_Bonf_verify$BMI ~ smoke_source + sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
coef(smk2)[2,c(1,4)]
smk3 = summary(lm(formula = BMI_Bonf_verify$BMI ~ smoke_hyg + sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
coef(smk3)[2,c(1,4)]
smk <- rbind(coef(smk1)[2,c(1,4)],coef(smk2)[2,c(1,4)],coef(smk3)[2,c(1,4)])
BMI_Bonf_verify.res[c(14:16),c(2:3)] <- smk

BMI11.Bonf.fu <- merge(BMI11.Bonf.step2, BMI_Bonf_verify.res)
BMI11.Bonf.fu$dir[which(BMI11.Bonf.fu$Estimate.step2*BMI11.Bonf.fu$Estimate.fu > 0)] <- 1 
BMI11.Bonf.fu$dir[which(BMI11.Bonf.fu$Estimate.step2*BMI11.Bonf.fu$Estimate.fu < 0)] <- -1 
write.csv(BMI11.Bonf.fu,"BMI11Bonf_fu.csv")
# at 17.6 yo
BMI.Bonf.fu <- c(Confounders, BMI17.Bonf.step2$varnames,"id")
BMI.Bonf.fu <- unique(BMI.Bonf.fu)
BMI.Bonf.fu  <-  BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% BMI.Bonf.fu)]
BMI_Bonf_verify <- merge(BMI_new, BMI.Bonf.fu,by = "id") 
BMI_Bonf_verify$sex <- as.numeric(BMI_Bonf_verify$sex)
str(BMI_Bonf_verify)

LRM3_coef <- function(x){
  lm3 <- lm(formula = BMI_Bonf_verify$BMI ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI_Bonf_verify, na.rm = TRUE)
  summary(lm(formula = BMI_Bonf_verify$BMI ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
}
lm3 <- lapply(BMI_Bonf_verify[,3:length(BMI_Bonf_verify)],LRM3_coef) 
coeff <- lapply(lm3,coefficients)


BMI_Bonf_verify.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI_Bonf_verify.res <-rbind(res, BMI_Bonf_verify.res)
}
BMI_Bonf_verify.res <- BMI_Bonf_verify.res[grepl("as.numeric",rownames(BMI_Bonf_verify.res)),]
names(BMI_Bonf_verify.res) <- c("varnames","Estimate.fu","p.fu")
BMI17.Bonf.fu <- merge(BMI17.Bonf.step2, BMI_Bonf_verify.res)
BMI17.Bonf.fu$dir[which(BMI17.Bonf.fu$Estimate.step2*BMI17.Bonf.fu$Estimate.fu > 0)] <- 1 
BMI17.Bonf.fu$dir[which(BMI17.Bonf.fu$Estimate.step2*BMI17.Bonf.fu$Estimate.fu < 0)] <- -1 
write.csv(BMI17.Bonf.fu,"BMI17Bonf_fu.csv")

# WHR =====

# Bonferroni
# at 11.5 yo
WHR.Bonf.fu <- c(Confounders, WHR11.Bonf.step2$varnames,"id")
WHR.Bonf.fu <- unique(WHR.Bonf.fu)
WHR.Bonf.fu  <-  WHR11yo_survey1_SEP[,(names(WHR11yo_survey1_SEP) %in% WHR.Bonf.fu)]
WHR_Bonf_verify <- merge(WHR_new, WHR.Bonf.fu,by = "id") 
WHR_Bonf_verify$sex <- as.numeric(WHR_Bonf_verify$sex)
str(WHR_Bonf_verify)

LRM3_coef <- function(x){
  lm3 <- lm(formula = WHR_Bonf_verify$WHR ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR_Bonf_verify, na.rm = TRUE)
  summary(lm(formula = WHR_Bonf_verify$WHR ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
}
lm3 <- lapply(WHR_Bonf_verify[,3:length(WHR_Bonf_verify)],LRM3_coef) 
coeff <- lapply(lm3,coefficients)


WHR_Bonf_verify.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR_Bonf_verify.res <-rbind(res, WHR_Bonf_verify.res)
}
WHR_Bonf_verify.res <- WHR_Bonf_verify.res[grepl("as.numeric",rownames(WHR_Bonf_verify.res)),]
names(WHR_Bonf_verify.res) <- c("varnames","Estimate.fu","p.fu")
#mshs  
mshs <- summary(lm(formula = WHR_Bonf_verify$WHR ~ as.numeric(mshs) + sex + house  + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
mshs <- coef(mshs)[2,c(1,4)]
WHR_Bonf_verify.res[7,c(2:3)] <- mshs

#house 
house <- summary(lm(formula = WHR_Bonf_verify$WHR ~  house + sex + mshs  + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
house
WHR11.Bonf.fu <- merge(WHR11.Bonf.step2, WHR_Bonf_verify.res)
WHR11.Bonf.fu$dir[which(WHR11.Bonf.fu$Estimate.step2*WHR11.Bonf.fu$Estimate.fu > 0)] <- 1 
WHR11.Bonf.fu$dir[which(WHR11.Bonf.fu$Estimate.step2*WHR11.Bonf.fu$Estimate.fu < 0)] <- -1 
write.csv(WHR11.Bonf.fu,"WHR11Bonf_fu.csv")


# at 17yo
WHR.Bonf.fu <- c(Confounders, WHR17.Bonf.step2$varnames,"id")
WHR.Bonf.fu <- unique(WHR.Bonf.fu)
WHR.Bonf.fu  <-  BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% WHR.Bonf.fu)]
WHR_Bonf_verify <- merge(WHR_new, WHR.Bonf.fu,by = "id") 
WHR_Bonf_verify$sex <- as.numeric(WHR_Bonf_verify$sex)
str(WHR_Bonf_verify)

LRM3_coef <- function(x){
  lm3 <- lm(formula = WHR_Bonf_verify$WHR ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR_Bonf_verify, na.rm = TRUE)
  summary(lm(formula = WHR_Bonf_verify$WHR ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
}
lm3 <- lapply(WHR_Bonf_verify[,3:length(WHR_Bonf_verify)],LRM3_coef) 
coeff <- lapply(lm3,coefficients)

#house 
house <- summary(lm(formula = WHR_Bonf_verify$WHR ~  house + sex + mshs  + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
house  

WHR_Bonf_verify.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR_Bonf_verify.res <-rbind(res, WHR_Bonf_verify.res)
}
WHR_Bonf_verify.res <- WHR_Bonf_verify.res[grepl("as.numeric",rownames(WHR_Bonf_verify.res)),]
names(WHR_Bonf_verify.res) <- c("varnames","Estimate.fu","p.fu")
WHR17.Bonf.fu <- merge(WHR17.Bonf.step2, WHR_Bonf_verify.res)
WHR17.Bonf.fu$dir[which(WHR17.Bonf.fu$Estimate.step2*WHR17.Bonf.fu$Estimate.fu > 0)] <- 1 
WHR17.Bonf.fu$dir[which(WHR17.Bonf.fu$Estimate.step2*WHR17.Bonf.fu$Estimate.fu < 0)] <- -1 
write.csv(WHR17.Bonf.fu,"WHR17Bonf_fu.csv")
#associations of exposures identified at 11.5 yo with measures at 17.6 yo 
#outcomes at 17.6 yo  
BMIWHR_Biobank = read.csv("BMIWHR_Biobank.csv", header = T, stringsAsFactors = T) 
mean_waist <- apply(BMIWHR_Biobank[,4:5], 1, mean)
mean_hip <- apply(BMIWHR_Biobank[,6:7], 1, mean)
whr <-  mean_waist/mean_hip
BMIWHR_Biobank <- cbind(whr,BMIWHR_Biobank)

#exposures at 11.5 yo 
annotation <- read.csv("exposure_category_for tables.csv")
BMI11.Bonf.step2 = read.csv('BMI11_Bonf_step2res_new.csv', header = T)
BMI11.Bonf.step2 = merge(BMI11.Bonf.step2, annotation, by.x = "varnames", by.y = "VarNames", all.x = T)
WHR11.Bonf.step2 = read.csv('WHR11_Bonf_step2res.csv', header = T)
WHR11.Bonf.step2 = merge(WHR11.Bonf.step2, annotation, by.x = "varnames", by.y = "VarNames", all.x = T)

Confounders <- c("sex","house","mshs","edum2016","income","hkm2016","agemc2016")

# BMI=====
BMI.Bonf.fu <- c("id","bmi",Confounders, BMI11.Bonf.step2$varnames)
BMI.Bonf.fu <- unique(BMI.Bonf.fu)
BMI.Bonf.fu2  <- BMI132_survey1[,(names(BMI132_survey1) %in% BMI.Bonf.fu)]
BMI_Bonf_verify1  <-  BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% BMI.Bonf.fu)]
BMI_Bonf_verify2 <- merge(BMIWHR_Biobank[,c(3:4)], BMI.Bonf.fu2,by = "id") 
BMI_Bonf_verify <- merge(BMI_Bonf_verify1,BMI_Bonf_verify2,by = "id", all.x = T)
BMI_Bonf_verify$sex <- as.numeric(BMI_Bonf_verify$sex)
str(BMI_Bonf_verify)


LRM3_coef <- function(x){
  lm3 <- lm(formula = BMI_Bonf_verify$bmi.x ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI_Bonf_verify, na.rm = TRUE)
  summary(lm(formula = BMI_Bonf_verify$bmi.x ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI_Bonf_verify, na.rm = TRUE))
}
lm3 <- lapply(BMI_Bonf_verify[,3:length(BMI_Bonf_verify)],LRM3_coef) 
coeff <- lapply(lm3,coefficients)


BMI_Bonf_verify.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI_Bonf_verify.res <-rbind(res, BMI_Bonf_verify.res)
}

BMI_Bonf_verify.res <- BMI_Bonf_verify.res[grepl("as.numeric",rownames(BMI_Bonf_verify.res)),]
names(BMI_Bonf_verify.res) <- c("varnames","Estimate.fu","p.fu")

# group of smoking exposures
smk1 = summary(lm(formula = BMI_Bonf_verify$bmi.x ~ as.numeric(mshs) + sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
coef(smk1)[2,c(1,4)]
smk2 = summary(lm(formula = BMI_Bonf_verify$bmi.x ~ smoke_source + sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
coef(smk2)[2,c(1,4)]
smk3 = summary(lm(formula = BMI_Bonf_verify$bmi.x ~ smoke_hyg + sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =BMI_Bonf_verify, na.rm = TRUE))
coef(smk3)[2,c(1,4)]
smk <- rbind(coef(smk1)[2,c(1,4)],coef(smk2)[2,c(1,4)],coef(smk3)[2,c(1,4)])
BMI_Bonf_verify.res[c(12:14),c(2:3)] <- smk

BMI11.Bonf.fu <- merge(BMI11.Bonf.step2, BMI_Bonf_verify.res)
BMI11.Bonf.fu$dir[which(BMI11.Bonf.fu$Estimate.step2*BMI11.Bonf.fu$Estimate.fu > 0)] <- 1 
BMI11.Bonf.fu$dir[which(BMI11.Bonf.fu$Estimate.step2*BMI11.Bonf.fu$Estimate.fu < 0)] <- -1 
write.csv(BMI11.Bonf.fu,"BMI17exposure11_verify.csv")

# WHR =====
WHR.Bonf.fu <- c("id","whr",Confounders, WHR11.Bonf.step2$varnames)
WHR.Bonf.fu <- unique(WHR.Bonf.fu)
WHR.Bonf.fu2  <- WHR11yo_survey1_SEP[,(names(WHR11yo_survey1_SEP) %in% WHR.Bonf.fu)]
WHR_Bonf_verify1  <-  BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% WHR.Bonf.fu)]
WHR_Bonf_verify2 <- merge(BMIWHR_Biobank[,c(1,3)], WHR.Bonf.fu2,by = "id")
WHR_Bonf_verify2 <- WHR_Bonf_verify2[,c(1:3)]
WHR_Bonf_verify <- merge(WHR_Bonf_verify1,WHR_Bonf_verify2,by = "id", all.x = T)
WHR_Bonf_verify$sex <- as.numeric(WHR_Bonf_verify$sex)
str(WHR_Bonf_verify)

LRM3_coef <- function(x){
  lm3 <- lm(formula = WHR_Bonf_verify$whr.x ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = WHR_Bonf_verify, na.rm = TRUE)
  summary(lm(formula = WHR_Bonf_verify$whr.x ~ as.numeric(x) + sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
}
lm3 <- lapply(WHR_Bonf_verify[,3:length(WHR_Bonf_verify)],LRM3_coef) 
coeff <- lapply(lm3,coefficients)


WHR_Bonf_verify.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  WHR_Bonf_verify.res <-rbind(res, WHR_Bonf_verify.res)
}
WHR_Bonf_verify.res <- WHR_Bonf_verify.res[grepl("as.numeric",rownames(WHR_Bonf_verify.res)),]
names(WHR_Bonf_verify.res) <- c("varnames","Estimate.fu","p.fu")
#mshs  
mshs <- summary(lm(formula = WHR_Bonf_verify$whr.x ~ as.numeric(mshs) + sex + house  + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
mshs <- coef(mshs)[2,c(1,4)]
WHR_Bonf_verify.res[7,c(2:3)] <- mshs

#house 
house <- summary(lm(formula = WHR_Bonf_verify$whr.x ~  as.factor(house) + sex + mshs  + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data =WHR_Bonf_verify, na.rm = TRUE))
WHR_Bonf_verify.res[8,c(2:3)] <- house$coefficients[2,c(1,4)]
WHR11.Bonf.fu <- merge(WHR11.Bonf.step2, WHR_Bonf_verify.res)
WHR11.Bonf.fu$dir[which(WHR11.Bonf.fu$Estimate.step2*WHR11.Bonf.fu$Estimate.fu > 0)] <- 1 
WHR11.Bonf.fu$dir[which(WHR11.Bonf.fu$Estimate.step2*WHR11.Bonf.fu$Estimate.fu < 0)] <- -1 
write.csv(WHR11.Bonf.fu,"WHR17exposure11_verify.csv")

#convert BMI to obesity risk
rm(list = ls())
dictionary <- read.csv("variables des_20220514.csv", header =TRUE, sep = ',')
Confounders <- c("sex","house","mshs","edum2016","income","hkm2016","agemc2016")
annotation <- read.csv("exposure_category_for tables.csv") # exposure annotation for tables


#11yo BMI + SEP   
BMI132_SEP = read.csv("BMI132_SEP.csv", header = T,stringsAsFactors = T) 
# binary variable for bmi
BMI132_SEP$bmi_bin <- 'NA'
BMI132_SEP$bmi_bin = ifelse((BMI132_SEP$sex == "M" & BMI132_SEP$bmi_132 >= 20.89)|(BMI132_SEP$sex == "F" & BMI132_SEP$bmi_132 >= 21.2),1, 0)



# 11yo BMI + survey1  
BMI132_survey1 = read.csv("BMI132_survey1.csv", header =T, stringsAsFactors = T) 
# binary variable for bmi
BMI132_survey1$bmi_bin <- 'NA'
BMI132_survey1$bmi_bin <- ifelse((BMI132_survey1$sex == "M" & BMI132_survey1$bmi_132 >= 20.89)|(BMI132_survey1$sex == "F" & BMI132_survey1$bmi_132 >= 21.2),1,0)


# 17.6 yo BMIWHR
BMIWHR_Biobank = read.csv("BMIWHR_Biobank.csv", header = T, stringsAsFactors = T)  #442 exposures
# binary variable for bmi
BMI132_SEP$bmi_bin <- 'NA'
BMI132_SEP$bmi_bin = ifelse((BMI132_SEP$sex == "M" & BMI132_SEP$bmi_132 >= 20.89)|(BMI132_SEP$sex == "F" & BMI132_SEP$bmi_132 >= 21.2),1, 0)
# 11.5 yo =====  
# results for selected exposures from step 1 using BMI as a continuous outcome 
BMI11.res <- read.csv("BMI11.res.csv", header = T)
BMI17.res <- read.csv("BMI17.res.csv", header = T)


# 1) converted BMI into binary variables for evaluating general obesity risk (Logistic regression) =======  
# 11.5 yo =====  
#selected exposures from step 1 using BMI as a categorical outcome 

BMI11.Bonf <- BMI11.res$names.coeff.i..[which(BMI11.res$Bonf.sig == 1)]  
BMI11.Bonf <- c(BMI11.Bonf,Confounders)
BMI11.Bonf <- unique(BMI11.Bonf)
BMI11.Bonf1 <- cbind(BMI132_SEP[,c("id","bmi_bin")], BMI132_SEP[,(names(BMI132_SEP) %in% BMI11.Bonf)])
BMI11.Bonf2 <- cbind(BMI132_survey1[,c("id","bmi_bin")], BMI132_survey1[,(names(BMI132_survey1) %in% BMI11.Bonf)])

x <- nrow(BMI11.Bonf1)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI11.Bonf1$bmi_bin ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf1,family = 'binomial')
  summary(glm(formula = BMI11.Bonf1$bmi_bin ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf1,family = 'binomial'))
}
lm2 <- lapply(BMI11.Bonf1[,2:length(BMI11.Bonf1)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)
BMI11.Bonf1.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf1.res <-rbind(res, BMI11.Bonf1.res)
}
BMI11.Bonf1.step2res <- BMI11.Bonf1.res[grepl("as.numeric",rownames(BMI11.Bonf1.res)),]



# replace 'smoke_hyg','smoke_source','q29' because they should not adjust for 'mshs'
smoking <- c('bmi_bin',Confounders,'smoke_hyg','smoke_source','q29')
BMI11.Bonf3 <- BMI11.Bonf1[,(names(BMI11.Bonf1) %in% smoking)]

x <- nrow(BMI11.Bonf3)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI11.Bonf3$bmi_bin ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf3,family = 'binomial')
  summary(glm(formula = BMI11.Bonf3$bmi_bin ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf3,family = 'binomial'))
}
lm2 <- lapply(BMI11.Bonf3[,3:length(BMI11.Bonf3)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf3.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf3.res <-rbind(res, BMI11.Bonf3.res)
}
BMI11.Bonf3.step2res <- BMI11.Bonf3.res[grepl("as.numeric",rownames(BMI11.Bonf3.res)),]

BMI11.Bonf1.step2res[grep("q29",BMI11.Bonf1.step2res$names.coeff.i..),]  <- BMI11.Bonf3.step2res[grep("q29",BMI11.Bonf3.step2res$names.coeff.i..),] 
BMI11.Bonf1.step2res[grep("smoke_source",BMI11.Bonf1.step2res$names.coeff.i..),] <- BMI11.Bonf3.step2res[grep("smoke_source",BMI11.Bonf3.step2res$names.coeff.i..),] 
BMI11.Bonf1.step2res[grep("smoke_hyg",BMI11.Bonf1.step2res$names.coeff.i..),] <- BMI11.Bonf3.step2res[grep("smoke_hyg",BMI11.Bonf3.step2res$names.coeff.i..),] 


#exposures from survey1
BMI11.Bonf2 <- cbind(BMI132_survey1[,c("id","bmi_bin")], BMI132_survey1[,(names(BMI132_survey1) %in% BMI11.Bonf)])
BMI11.Bonf2.c <- cbind(BMI132_SEP[,"id"],BMI132_SEP[,(names(BMI132_SEP) %in% Confounders)])
names(BMI11.Bonf2)[1] <- "id"
names(BMI11.Bonf2.c)[1] <- "id"
BMI11.Bonf2  <- merge(BMI11.Bonf2,BMI11.Bonf2.c, by = "id")

x <- nrow(BMI11.Bonf2)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI11.Bonf2$bmi_bin ~ as.numeric(x)+ sex.x + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf2,family = 'binomial')
  summary(glm(formula = BMI11.Bonf2$bmi_bin ~ as.numeric(x)+ sex.x + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI11.Bonf2,family = 'binomial'))
}
lm2 <- lapply(BMI11.Bonf2[,3:length(BMI11.Bonf2)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf2.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf2.res <-rbind(res, BMI11.Bonf2.res)
}
BMI11.Bonf2.step2res <- BMI11.Bonf2.res[grepl("as.numeric",rownames(BMI11.Bonf2.res)),]
dim(BMI11.Bonf2.step2res)
BMI11.Bonf2.step2res <- BMI11.Bonf2.step2res[grepl("q11_2n|q8b",BMI11.Bonf2.step2res$names.coeff.i..),]


BMI11.Bonf.step2 <- rbind(BMI11.Bonf1.step2res, BMI11.Bonf2.step2res)
names(BMI11.Bonf.step2) <- c("varnames","Estimate.step2","p.step2")
BMI11.Bonf.step2 = merge(BMI11.Bonf.step2, annotation, by.x = "varnames", by.y = "VarNames", all.x = T)
# calculate OR and 95%CI
BMI11.Bonf.step2$SE <- abs(BMI11.Bonf.step2$Estimate.step2)/(-0.862+sqrt(0.743-2.404*log(BMI11.Bonf.step2$p.step2))) #se of beta on log scale
BMI11.Bonf.step2$b.lb <- BMI11.Bonf.step2$Estimate.step2 - 1.96*BMI11.Bonf.step2$SE
BMI11.Bonf.step2$b.ub <- BMI11.Bonf.step2$Estimate.step2 + 1.96*BMI11.Bonf.step2$SE
BMI11.Bonf.step2$OR <- round(exp(BMI11.Bonf.step2$Estimate.step2),3) #beta = log(OR)
BMI11.Bonf.step2$OR.LB <- round(exp(BMI11.Bonf.step2$b.lb),3)
BMI11.Bonf.step2$OR.UB <- round(exp(BMI11.Bonf.step2$b.ub),3)
BMI11.Bonf.step2$CI <- c(paste0(round(BMI11.Bonf.step2$OR,3)," (",round(BMI11.Bonf.step2$OR.LB,3),", ", round(BMI11.Bonf.step2$OR.UB,3),")"))

write.csv(BMI11.Bonf.step2,'BMI11_logreg_step2res.csv')


# 17.6 yo======
#selected exposures from step 1 using BMI as a categorical outcome 

BMI17.Bonf <- BMI17.res$names.coeff.i..[which(BMI17.res$Bonf.sig == 1)]  
BMI17.Bonf <- c(BMI17.Bonf,Confounders)
BMI17.Bonf <- unique(BMI17.Bonf)
BMI17.Bonf <- cbind(BMIWHR_Biobank[,c("id","bmi_bin")],BMIWHR_Biobank[,(names(BMIWHR_Biobank) %in% BMI17.Bonf)])

x <- nrow(BMI17.Bonf)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI17.Bonf$bmi_bin ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI17.Bonf,family = 'binomial')
  summary(glm(formula = BMI17.Bonf$bmi_bin ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016, data = BMI17.Bonf,family = 'binomial'))
}
lm2 <- lapply(BMI17.Bonf[,3:length(BMI17.Bonf)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI17.Bonf.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI17.Bonf.res <-rbind(res, BMI17.Bonf.res)
}
BMI17.Bonf.step2res <- BMI17.Bonf.res[grepl("as.numeric",rownames(BMI17.Bonf.res)),]


BMI17.Bonf.step2 <- BMI17.Bonf.step2res
names(BMI17.Bonf.step2) <- c("varnames","Estimate.step2","p.step2")
BMI17.Bonf.step2 = merge(BMI17.Bonf.step2, annotation, by.x = "varnames", by.y = "VarNames", all.x = T)
BMI17.Bonf.step2 = BMI17.Bonf.step2[!is.na(BMI17.Bonf.step2$Group.Names),]  

# calculate OR and 95%CI
BMI17.Bonf.step2$SE <- abs(BMI17.Bonf.step2$Estimate.step2)/(-0.862+sqrt(0.743-2.404*log(BMI17.Bonf.step2$p.step2))) #se of beta on log scale
BMI17.Bonf.step2$b.lb <- BMI17.Bonf.step2$Estimate.step2 - 1.96*BMI17.Bonf.step2$SE
BMI17.Bonf.step2$b.ub <- BMI17.Bonf.step2$Estimate.step2 + 1.96*BMI17.Bonf.step2$SE
BMI17.Bonf.step2$OR <- round(exp(BMI17.Bonf.step2$Estimate.step2),3) #beta = log(OR)
BMI17.Bonf.step2$OR.LB <- round(exp(BMI17.Bonf.step2$b.lb),3)
BMI17.Bonf.step2$OR.UB <- round(exp(BMI17.Bonf.step2$b.ub),3)
BMI17.Bonf.step2$CI <- c(paste0(round(BMI17.Bonf.step2$OR,3)," (",round(BMI17.Bonf.step2$OR.LB,3),", ", round(BMI17.Bonf.step2$OR.UB,3),")"))

write.csv(BMI17.Bonf.step2,'BMI17_logreg_step2res.csv')


# 2) additionally adjusted for the time difference variable======
rm(list = ls())
t1 <- read.csv("Jane_20230116_combined dataset.csv", header = T)
t2 <- read.csv("Jane_20220511_combined dataset I.csv", header = T)
t3 <- read.csv("BMI_132.csv", header = T)
t4 <- read.csv("Jane_20201123_biobankchildqn.csv", header = T)
t5 <- read.csv("Jane_20201029_Biobank_Parents copy.csv", header = T) # same file solved string problems
t6 <- read.csv("Jane_20201029_Biobank_anthropometrics.csv", header = T)

dictionary <- read.csv("variables des_20220514.csv", header =TRUE, sep = ',')
Confounders <- c("sex","house","mshs","edum2016","income","hkm2016","agemc2016")
annotation <- read.csv("exposure_category_for tables.csv") # exposure annotation for tables

# results for selected exposures from step 1 using BMI as a continuous outcome 
BMI11.res <- read.csv("BMI11.res.csv", header = T)
BMI17.res <- read.csv("BMI17.res.csv", header = T)


#11.5 yo ===============
#SEP
BMI132_SEP = read.csv("BMI132_SEP.csv", header = T,stringsAsFactors = T) 

BMI132_SEP <- merge(BMI132_SEP,t3, by = 'id')  
sum(is.na(BMI132_SEP$age_bhw132), na.rm = T)    
# time difference is approximated as the age of BMI measurement 

BMI11.Bonf <- BMI11.res$names.coeff.i..[which(BMI11.res$Bonf.sig == 1)]  
BMI11.Bonf <- c(BMI11.Bonf, Confounders)
BMI11.Bonf <- unique(BMI11.Bonf)
BMI11.Bonf1 <- cbind(BMI132_SEP[,c("id","bmi_132.x","age_bhw132")], BMI132_SEP[,(names(BMI132_SEP) %in% BMI11.Bonf)])  # baseline

x <- nrow(BMI11.Bonf1)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI11.Bonf1$bmi ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_bhw132, data = BMI11.Bonf1)
  summary(glm(formula = BMI11.Bonf1$bmi ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_bhw132, data = BMI11.Bonf1))
}
lm2 <- lapply(BMI11.Bonf1[,2:length(BMI11.Bonf1)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf1.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf1.res <-rbind(res, BMI11.Bonf1.res)
}
BMI11.Bonf1.step2res <- BMI11.Bonf1.res[grepl("as.numeric",rownames(BMI11.Bonf1.res)),]


# replace 'smoke_hyg','smoke_source','q29' because they should not adjust for 'mshs'
smoking <- c('bmi_132.x',Confounders,'smoke_hyg','smoke_source','q29','age_bhw132')
BMI11.Bonf3 <- BMI11.Bonf1[,(names(BMI11.Bonf1) %in% smoking)]

x <- nrow(BMI11.Bonf3)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI11.Bonf3$bmi_132.x ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_bhw132, data = BMI11.Bonf3)
  summary(glm(formula = BMI11.Bonf3$bmi_132.x ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_bhw132, data = BMI11.Bonf3))
}
lm2 <- lapply(BMI11.Bonf3[,3:length(BMI11.Bonf3)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf3.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf3.res <-rbind(res, BMI11.Bonf3.res)
}
BMI11.Bonf3.step2res <- BMI11.Bonf3.res[grepl("as.numeric",rownames(BMI11.Bonf3.res)),]

BMI11.Bonf1.step2res[grep("q29",BMI11.Bonf1.step2res$names.coeff.i..),]  <- BMI11.Bonf3.step2res[grep("q29",BMI11.Bonf3.step2res$names.coeff.i..),] 
BMI11.Bonf1.step2res[grep("smoke_source",BMI11.Bonf1.step2res$names.coeff.i..),] <- BMI11.Bonf3.step2res[grep("smoke_source",BMI11.Bonf3.step2res$names.coeff.i..),] 
BMI11.Bonf1.step2res[grep("smoke_hyg",BMI11.Bonf1.step2res$names.coeff.i..),] <- BMI11.Bonf3.step2res[grep("smoke_hyg",BMI11.Bonf3.step2res$names.coeff.i..),] 




# survey1 
BMI132_survey1 = read.csv("BMI132_survey1.csv", header =T, stringsAsFactors = T) 
BMI132 <- c(names(BMI132_survey1),names(BMI132_SEP)) 
BMI132_var <- dictionary[(dictionary$VarNames %in% BMI132),] 
BMI132_survey1_t = merge(BMI132_survey1, t1, by = "id")   
BMI132_survey1_t1 = merge(BMI132_survey1_t, t3, by = "id")   

sum(is.na(BMI132_survey1_t1$date_rec), na.rm = T)   
sum(is.na(BMI132_survey1_t1$agey_bhw132), na.rm = T)   

BMI132_survey1_t1$survey1age_wks = with(BMI132_survey1_t1, as.numeric(difftime(as.Date(BMI132_survey1_t1$date_rec, format='%m/%d/%Y'),
                                                                               as.Date(BMI132_survey1_t1$date_rec,format='%m/%d/%Y'), units = "weeks")))
BMI132_survey1_t1$survey1age_yrs = BMI132_survey1_t1$survey1age_wks *0.019165  #converting weeks to yrs
BMI132_survey1_t1$timediff = BMI132_survey1_t1$survey1age_yrs - BMI132_survey1_t1$agey_bhw132     #missing 3789


BMI11.Bonf2 <- cbind(BMI132_survey1_t1[,c("id","bmi_132.x","timediff")], BMI132_survey1_t1[,(names(BMI132_survey1_t1) %in% BMI11.Bonf)])  #survey1
BMI11.Bonf2.c <- cbind(BMI132_SEP[,"id"],BMI132_SEP[,(names(BMI132_SEP) %in% Confounders)])
names(BMI11.Bonf2)[1] <- "id"
names(BMI11.Bonf2.c)[1] <- "id"
BMI11.Bonf2  <- merge(BMI11.Bonf2,BMI11.Bonf2.c, by = "id")

x <- nrow(BMI11.Bonf2)
LRM2_coef <- function(x){
  lm2 <- lm(formula = BMI11.Bonf2$bmi_132.x ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + timediff, data = BMI11.Bonf2, na.rm = TRUE)
  summary(lm(formula = BMI11.Bonf2$bmi_132.x ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + timediff, data = BMI11.Bonf2, na.rm = TRUE))
}
lm2 <- lapply(BMI11.Bonf2[,3:length(BMI11.Bonf2)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI11.Bonf2.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI11.Bonf2.res <-rbind(res, BMI11.Bonf2.res)
}
BMI11.Bonf2.step2res <- BMI11.Bonf2.res[grepl("as.numeric",rownames(BMI11.Bonf2.res)),]
dim(BMI11.Bonf2.step2res)
BMI11.Bonf2.step2res <- BMI11.Bonf2.step2res[grepl("q11_2n|q8b",BMI11.Bonf2.step2res$names.coeff.i..),]

BMI11.Bonf.step2 <- rbind(BMI11.Bonf1.step2res, BMI11.Bonf2.step2res)
names(BMI11.Bonf.step2) <- c("varnames","Estimate.step2","p.step2")
BMI11.Bonf.step2 = merge(BMI11.Bonf.step2, annotation, by.x = "varnames", by.y = "VarNames", all.x = T)

write.csv(BMI11.Bonf.step2,'BMI11_timediff.csv')



# 17.6 yo ========
BMIWHR_Biobank = read.csv("BMIWHR_Biobank.csv", header = T, stringsAsFactors = T) 
biobank_var = names(BMIWHR_Biobank[9:length(BMIWHR_Biobank)])
BMIWHR_Biobank_var <- dictionary[(dictionary$VarNames %in% names(BMIWHR_Biobank)),]
BMIWHR_Biobank_t = merge(BMIWHR_Biobank, t1, by = 'id')

BMIWHR_Biobank_t$age_anthro_wks = with(BMIWHR_Biobank_t, as.numeric(difftime(as.Date(BMIWHR_Biobank_t$date_anthro, format='%m/%d/%Y'),
                                                                             as.Date(BMIWHR_Biobank_t$date_anthro,format='%m/%d/%Y'), units = "weeks")))
BMIWHR_Biobank_t$age_anthro_yrs = BMIWHR_Biobank_t$age_anthro_wks *0.019165  #converting weeks to yrs


# SEP 
BMI17.Bonf <- BMI17.res$names.coeff.i..[which(BMI17.res$Bonf.sig == 1)]  
BMI17.Bonf <- c(BMI17.Bonf, Confounders)
BMI17.Bonf <- unique(BMI17.Bonf) # look up, do not include biobank variables
BMI17.Bonf <- c("wt_fa2016","wt_mo2016","twin01","mshs","smoke_hyg","bw","sex","house","edum2016","income","hkm2016","agemc2016")
BMI17.Bonf1 <- cbind(BMIWHR_Biobank_t[,c("id","bmi","age_anthro_yrs")], BMIWHR_Biobank_t[,(names(BMIWHR_Biobank_t) %in% BMI17.Bonf)])  # baseline

x <- nrow(BMI17.Bonf1)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI17.Bonf1$bmi ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_anthro_yrs, data = BMI17.Bonf1)
  summary(glm(formula = BMI17.Bonf1$bmi ~ as.numeric(x)+ sex + house + mshs + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_anthro_yrs, data = BMI17.Bonf1))
}
lm2 <- lapply(BMI17.Bonf1[,2:length(BMI17.Bonf1)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI17.Bonf1.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI17.Bonf1.res <-rbind(res, BMI17.Bonf1.res)
}
BMI17.Bonf1.step2res <- BMI17.Bonf1.res[grepl("as.numeric",rownames(BMI17.Bonf1.res)),]


# replace 'smoke_hyg','smoke_source','q29' because they should not adjust for 'mshs'
smoking <- c('bmi',Confounders,'smoke_hyg','smoke_source','age_anthro_yrs')
BMI17.Bonf3 <- BMI17.Bonf1[,(names(BMI17.Bonf1) %in% smoking)]

x <- nrow(BMI17.Bonf3)
LRM2_coef <- function(x){
  lm2 <- glm(formula = BMI17.Bonf3$bmi ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_anthro_yrs, data = BMI17.Bonf3)
  summary(glm(formula = BMI17.Bonf3$bmi ~ as.numeric(x)+ sex + house + income + edum2016 + agemc2016 + hkm2016 + hkm2016*edum2016 + age_anthro_yrs, data = BMI17.Bonf3))
}
lm2 <- lapply(BMI17.Bonf3[,3:length(BMI17.Bonf3)],LRM2_coef) 
coeff <- lapply(lm2,coefficients)

BMI17.Bonf3.res <-data.frame()
for (i in 1:length(coeff)){
  res <- rbind(coeff[[i]][,c(1,4)])
  res <-data.frame(names(coeff[i]),res)
  BMI17.Bonf3.res <-rbind(res, BMI17.Bonf3.res)
}
BMI17.Bonf3.step2res <- BMI17.Bonf3.res[grepl("as.numeric",rownames(BMI17.Bonf3.res)),]

BMI17.Bonf1.step2res[grep("q29",BMI17.Bonf1.step2res$names.coeff.i..),]  <- BMI17.Bonf3.step2res[grep("q29",BMI17.Bonf3.step2res$names.coeff.i..),] 
BMI17.Bonf1.step2res[grep("smoke_source",BMI17.Bonf1.step2res$names.coeff.i..),] <- BMI17.Bonf3.step2res[grep("smoke_source",BMI17.Bonf3.step2res$names.coeff.i..),] 
BMI17.Bonf1.step2res[grep("smoke_hyg",BMI17.Bonf1.step2res$names.coeff.i..),] <- BMI17.Bonf3.step2res[grep("smoke_hyg",BMI17.Bonf3.step2res$names.coeff.i..),] 

names(BMI17.Bonf1.step2res) <- c("varnames","Estimate.step2","p.step2")
BMI17.Bonf1.step2res = merge(BMI17.Bonf1.step2res, annotation, by.x = "varnames", by.y = "VarNames", all.x = T)

write.csv(BMI17.Bonf1.step2res,'BMI17_timediff.csv')



#For epigenome-wide association study  
library(sandwich)
f = function(x){
  m = lm(BMI ~betas.clean[x,] +factor(hkm2016)+factor(sex)+inc+age2+age3+neupc+lympc+monpc+eospc+factor(assayid)+factor(mshs)+factor(edum2016),data=pheno)
  x<-coeftest(m, vcov = vcovHC(m, type="HC3"))
  paste0(x[2,1], ",", x[2,2], ",", x[2,4])
}

aa<-read.csv("CpGs.csv")
names<-aa$X
beta_se = lapply(names,f)
file<-cbind(names, as.vector(unlist(beta_se)))
write.csv(file, "BMI_cpg.csv")

f = function(x){
  m = lm(WHR ~betas.clean[x,] +factor(hkm2016)+factor(sex)+inc+age2+age3+neupc+lympc+monpc+eospc+factor(assayid)+factor(mshs)+factor(edum2016),data=pheno)
  x<-coeftest(m, vcov = vcovHC(m, type="HC3"))
  paste0(x[2,1], ",", x[2,2], ",", x[2,4])
}

aa<-read.csv("CpGs.csv")
names<-aa$X
beta_se = lapply(names,f)
file<-cbind(names, as.vector(unlist(beta_se)))
write.csv(file, "WHR_cpg.csv")

library("bacon")
es <- as.numeric(file$beta)
se <- as.numeric(file$se)
bc <- bacon(NULL, es, se)
